############### ###############
## 04 - DM preparation for analysis
## Project: CBO
## Written by: Kamil Kouhen & Malte Lierl
## Purpose: Making last adjustments and adding useful variables before analysis
## Date of creation: 10/03/2022
############### ###############

library(here)
#Running preceding code
#source(here("Code","Rcode", "03 - preparation - DM cleaning.R"), echo = T) #Cleaning of CBO group ds

### Need to remove municipalities that are not included in the study ###

## Remove when Scorecards not distributed and/or municipal training not performed
DM_final <- readRDS(here(datatype, "Intermediate", "DM_intermediate.RDS")) %>%
  filter(Municipality_included_MON == 1)

### Weighting observations by number of DMs interviewed in each municipality and for each type of respondent  ###

  #Note# Different numbers of DMs were interviewed in each municipality, we are creating frequency weights to take this into account

DM_final %<>%
  group_by(commune) %>%
  mutate(todrop_countcommune = n()) %>%
  mutate(DM_weight = (1/todrop_countcommune)) %>% 
  ungroup() %>%
  select(-contains("todrop_"))

### For Hypothesis 1: A financial stake in municipal government performance increases CBOs’ involvement in municipal governance. ###

  #Note# DM data is useful for the perceived influence dimension of hypothesis 1

##First pulling together variables that were disaggregated because randomly asked (know and interaction vars)

#"Knowledge and Interaction with municipal cbos" section was repeated randomly, need to pull variables together
pullingthemtogether <- function(df, n) {
  varname1know <- paste0("know", n , "_1")
  varname2know <- paste0("know", n , "_2")
  newvarnameknow <- paste0("know", n)
  
  varname1appr <- paste0("appreciation", n , "_1")
  varname2appr <- paste0("appreciation", n , "_2")
  newvarnameappr <- paste0("appreciation", n)
  
  mutate(df, 
         !!newvarnameknow := as.factor((ifelse(is.na(as.character(eval(parse(text = varname1know)))), as.character(eval(parse(text = varname2know))), as.character(eval(parse(text = varname1know)))))),
         !!newvarnameappr := as.factor((ifelse(is.na(as.character(eval(parse(text = varname1appr)))), as.character(eval(parse(text = varname2appr))), as.character(eval(parse(text = varname1appr))))))
         )
}

for (i in 1:10){
  DM_final <- pullingthemtogether(DM_final, i)
  if (paste0("know", i) %in% colnames(DM_final) == F) stop("Function did not work") #Checking if variables were created
  if (paste0("appreciation", i) %in% colnames(DM_final) == F) stop("Function for appreciation vars did not work") #Checking if variables were created
}

rm(pullingthemtogether)

#If CBO is not known by DM (know1 or know6 == 0), 0 for the other variables (instead of NA)
DM_final %<>%
  mutate(
    know10 = as.factor(as.character(ifelse(know6 == 0, 0, as.character(know10)))), #Simulated control CBO
    know9 = as.factor(as.character(ifelse(know6 == 0, 0, as.character(know9)))), #Simulated control CBO
    know8 = as.factor(as.character(ifelse(know6 == 0, 0, as.character(know8)))), #Simulated control CBO
    know7 = as.factor(as.character(ifelse(know6 == 0, 0, as.character(know7)))), #Simulated control CBO
    know6 = as.factor(as.character(ifelse(know6 == 0, 0, as.character(know6)))), #Simulated control CBO
    know5 = as.factor(as.character(ifelse(know1 == 0, 0, as.character(know5)))), #Simulated treatment CBO
    know4 = as.factor(as.character(ifelse(know1 == 0, 0, as.character(know4)))), #Simulated treatment CBO
    know3 = as.factor(as.character(ifelse(know1 == 0, 0, as.character(know3)))), #Simulated treatment CBO
    know2 = as.factor(as.character(ifelse(know1 == 0, 0, as.character(know2)))), #Simulated treatment CBO
    know1 = as.factor(as.character(ifelse(know1 == 0, 0, as.character(know1)))), #Simulated treatment CBO
  )

#Checking NAs in cbo_* variables
sumstats(DM_final[colnames(DM_final %>% dplyr::select(contains("know")))]) #A few NAs left

sumstats((DM_final %>% filter(know1 == 1 & know6 == 1) %>% na_if("Don't Know"))[DM_final %>% dplyr::select(id_DM, know1, know6, contains("cbo_")) %>% colnames])
sumstats((DM_final)[DM_final %>% dplyr::select(id_DM, know1, know6, contains("cbo_")) %>% colnames])

  #Note# Very few NAs in know1 and know6 (DM knows cbo or not) 3 and 1 respectively.
       # However, obviously, if DM does not know one of the CBOs, they can't answer cbo_* questions
       # comparing the two CBOs. We are left with 30% of DMs who did not answer the cbo_* questions (NAs)

nrow(DM_final %>% filter(know1 == 1 & know6 == 1) %>% distinct(commune)) / #If DM knows both CBOs
  nrow(DM_final %>% distinct(commune)) #If DM doesn't know one of the CBOs

## For the two next variables, we're are creating DMxCBO combinations (reshaping datasets with 2 obs per DM to be able to use indicator variables associated with single CBOS)
DM_finalx2 <- rbind( #Creating new reshaped data set (for simplicity, just duplicating the dataset)
  (DM_final %>% mutate(simu_combi_DMxCBO_TREAT = 1)), #Arbitrarily, first occurence is treated CBO
  (DM_final %>% mutate(simu_combi_DMxCBO_TREAT = 0))
) %>% 
  dplyr::select(simu_combi_DMxCBO_TREAT, everything()) %>%
  mutate(simu_combi_DMxCBO_TREAT = as.factor(as.character(simu_combi_DMxCBO_TREAT))) %>% 
  mutate(appcode = ifelse(simu_combi_DMxCBO_TREAT == 0, appcode_ctrl, appcode_treat)) #Need to highlight blinded id of CBO too

#Note# Use id_DM and simu_combi_DMxCBO_TREAT to identify the blinded DM and CBO combination (level of observations)

## Creating A_DM_know_and_inter: Breadth of DMs' knowledge and interaction with CBOs
DM_finalx2 %<>%
  mutate(todrop_know_knows = as.numeric((ifelse(as.character(simu_combi_DMxCBO_TREAT) == "1", as.character(know1), as.character(know6)))), 
         todrop_know_member = as.numeric((ifelse(as.character(simu_combi_DMxCBO_TREAT) == "1", as.character(know2), as.character(know7)))),
         todrop_know_approach_muni = as.numeric((ifelse(as.character(simu_combi_DMxCBO_TREAT) == "1", as.character(know3), as.character(know8)))), 
         todrop_know_approach_DM = as.numeric((ifelse(as.character(simu_combi_DMxCBO_TREAT) == "1", as.character(know4), as.character(know9))))) %>% 
  rowwise() %>%
  mutate(A_DM_know_and_inter = sum(todrop_know_knows, todrop_know_member, todrop_know_approach_muni, todrop_know_approach_DM)) %>%
  select(!contains("todrop_")) %>%
  set_variable_labels(A_DM_know_and_inter = "Breadth of DMs' knowledge and intereaction with CBOs")

  #Note# For estimation, simply: A_DM_know_and_inter = simu_combi_DMxCBO_TREAT because A_DM_know_and_inter is equal to know of treated CBO 
       # if simu_combi_DMxCBO_TREAT == 1 and equal to know of control if simu_combi_DMxCBO_TREAT == 0
  
reporting.changes(DM_finalx2, #Ad-hoc to report adding the variable into here("Output", "For Cleaning", "Changes made to datasets.xlsx")
                  "A_DM_know_and_inter", 
                  describe = "New variable: Breadth of DMs' knowledge and interaction with CBOs (B101-B104)")

## Creating A_log_DM_freqinter: Estimated frequency of interaction between DM and CBO
DM_finalx2 %<>%
  mutate(A_DM_freqinter= as.numeric((ifelse(as.character(simu_combi_DMxCBO_TREAT) == "1", as.character(know5), as.character(know10)))))

  #Note# Taking the variable as continuous but original categories are the following: 
          # 0 - Never
          # 0.5 - Once in two years or less often 
          # 1 - About once per year
          # 2 - About twice per year
          # 4 - About once per quarter 
          # 8 - About twice per quarter
          # 16 - One to two times a month
          # 32 - Weekly or almost weekly
          # 64 - Several times a week

## Creating A_cbo_influence_mean_weighted: Mean of DM's positive opinion for treated CBO (1 for T, -1, for C), weighted by nb of DM itwed in commune ##
DM_final %<>% 
  mutate(
    #Need to identify communes where only one CBO was mentioned.
    A_cbo_big = ifelse(cbo_big == "${cbo_treat}", 1*DM_weight, ifelse(cbo_big == "${cbo_ctrl}", -1*DM_weight, NA)), 
    A_cbo_gov = ifelse(cbo_gov == "${cbo_treat}", 1*DM_weight, ifelse(cbo_gov == "${cbo_ctrl}", -1*DM_weight, NA)), 
    A_cbo_influent = ifelse(cbo_influent == "${cbo_treat}", 1*DM_weight, ifelse(cbo_influent == "${cbo_ctrl}", -1*DM_weight, NA)), 
    A_cbo_power = ifelse(cbo_power == "${cbo_treat}", 1*DM_weight, ifelse(cbo_power == "${cbo_ctrl}", -1*DM_weight, NA)), 
    A_cbo_organized = ifelse(cbo_organized == "${cbo_treat}", 1*DM_weight, ifelse(cbo_organized == "${cbo_ctrl}", -1*DM_weight, NA))
  ) %>%
  rowwise() %>%
  mutate(A_cbo_influence_mean = mean(c(A_cbo_big, A_cbo_gov, A_cbo_influent, A_cbo_power, A_cbo_organized))) %>% #Mean of A_cbo variables 
  ungroup() %>%
  mutate(A_cbo_influence_mean_weighted = A_cbo_influence_mean) %>% #Weighting by number of DMs interviewed in each commune (to avoid answers of DMs of some commune to be overly weighted)
  select(!contains("todrop_"))
  
#Labelling new variables
DM_final %<>%
  set_variable_labels(A_cbo_influence_mean_weighted = "Mean of decision makers' comparative perception of influence between treated and control CBOS (1 for T, -1, for C), weighted by the number of interviewed decision makers in each municipality (see list in 'variable classification.xlsx')")

reporting.changes(DM_final, #Ad-hoc to report adding the variable into here("Output", "For Cleaning", "Changes made to datasets.xlsx")
                  "A_cbo_influence_mean_weighted", 
                  describe = "New variable:Mean of decision makers' comparative perception of influence between treated and control CBOS (1 for T, -1, for C), weighted by the number of interviewed decision makers in each municipality, (see list in 'variable classification.xlsx')")

### For Hypothesis 3: The CBO incentive scheme causes municipal decision makers to perceive greater accountability pressure.###

## Creating A_bin_decideurs_important: Per the decision makers, community groups and civil society are among the three most important actors to keep happy, in order to maintain influence
DM_final %<>%
  mutate(A_bin_decideurs_important = ifelse(decideur_important_first == 11 |
                                                               decideur_important_second == 11 | 
                                                               decideur_important_third == 11, 1, 0)) %>%
  mutate(A_bin_decideurs_important = ifelse((!is.na(decideur_important_first) | #If one is not NA and none equals 11 (for CBOs), as 0. 
                                               !is.na(decideur_important_second) | 
                                               !is.na(decideur_important_third)) & 
                                              is.na(A_bin_decideurs_important), 
                                            0, 
                                            A_bin_decideurs_important)) %>%
  set_variable_labels(A_bin_decideurs_important = "Per the decision makers, community groups and civil society are among the three most important actors to keep happy, in order to maintain influence")

reporting.changes(DM_final, #Ad-hoc to report adding the variable into here("Output", "For Cleaning", "Changes made to datasets.xlsx")
                  "A_bin_decideurs_important", 
                  describe = "New variable: Per the decision makers, community groups and civil society are among the three most important actors to keep happy, in order to maintain influence")

## Creating A_bin_decideurs_justify: Per the decision makers, community groups and civil society are among the three actors to which they spend the most effort justifying their decisions
DM_final %<>% 
  mutate(A_bin_decideurs_justify = ifelse(decideur_justify_first == 11 |
                                              decideur_justify_second == 11 | 
                                              decideur_justify_third == 11, 1, 0)) %>%
  mutate(A_bin_decideurs_justify = ifelse((!is.na(decideur_justify_first) | #If one is not NA and none equals 11 (for CBOs), 
                                               !is.na(decideur_justify_second) | 
                                               !is.na(decideur_justify_third)) & 
                                              is.na(A_bin_decideurs_justify), 
                                            0, 
                                            A_bin_decideurs_justify)) %>%
  set_variable_labels(A_bin_decideurs_justify = "Per the decision makers, community groups and civil society are among the three actors to which they spend the most effort justifying their decisions")

reporting.changes(DM_final, #Ad-hoc to report adding the variable into here("Output", "For Cleaning", "Changes made to datasets.xlsx")
                  "A_bin_decideurs_justify", 
                  describe = "New variable: Per the decision makers, community groups and civil society are among the three actors to which they spend the most effort justifying their decisions")

## Using pressure_civilsociety: Per the decision makers, to what extent they anticipate civil society pressure if their municipality fails to delivery on its responsibilities without good justification.
  #Using original variable

### For Hypothesis 4: The CBO incentive scheme increases municipal decision makers' awareness of performance shortfalls. ###

## Creating A_breadth_awareness_sm_indicators: Number of SUPERMUN indicators Decision Makers were able to correctly recall from memory
DM_final %<>% 
  ### Changing to numeric to be able to sum them up
  mutate_at(c("supermun_indic_1", 
              "supermun_indic_2", 
              "supermun_indic_3", 
              "supermun_indic_4", 
              "supermun_indic_5", 
              "supermun_indic_6", 
              "supermun_indic_7", 
              "supermun_indic_8",
              "supermun_indic_9",
              "supermun_indic_cap_1", 
              "supermun_indic_cap_2", 
              "supermun_indic_cap_3", 
              "supermun_indic_cap_4", 
              "supermun_indic_cap_5", 
              "supermun_indic_cap_6", 
              "supermun_indic_cap_7"), (as.character)) %>%
  mutate_at(c("supermun_indic_1", 
              "supermun_indic_2", 
              "supermun_indic_3", 
              "supermun_indic_4", 
              "supermun_indic_5", 
              "supermun_indic_6", 
              "supermun_indic_7", 
              "supermun_indic_8",
              "supermun_indic_9",
              "supermun_indic_cap_1", 
              "supermun_indic_cap_2", 
              "supermun_indic_cap_3", 
              "supermun_indic_cap_4", 
              "supermun_indic_cap_5", 
              "supermun_indic_cap_6", 
              "supermun_indic_cap_7"), (as.numeric)) %>%
  
  ### Creating sum
  rowwise() %>%
  mutate(
    A_breadth_awareness_sm_indicators = sum(supermun_indic_1, 
                                         supermun_indic_2, 
                                         supermun_indic_3, 
                                         supermun_indic_4, 
                                         supermun_indic_5, 
                                         supermun_indic_6, 
                                         supermun_indic_7, 
                                         supermun_indic_8,
                                         supermun_indic_9,
                                         supermun_indic_cap_1, 
                                         supermun_indic_cap_2, 
                                         supermun_indic_cap_3, 
                                         supermun_indic_cap_4, 
                                         supermun_indic_cap_5, 
                                         supermun_indic_cap_6, 
                                         supermun_indic_cap_7, 
                                         na.rm = F)) %>%
  ungroup() %>%

  ### There are two observations that are all missing, 
    # I putting A_breadth_awareness_sm_indicators to missing when it is the case
  mutate(A_breadth_awareness_sm_indicators = ifelse(is.na(supermun_indic__888) | is.na(supermun_indic_cap__888), NA, A_breadth_awareness_sm_indicators))

  ### Making sure it worked
  if (sum(is.na(DM_final$A_breadth_awareness_sm_indicators)) != 4) stop("There is something wrong with the number of NA here, please check")

  ### Checking that variable that indicates don't know is equal to 1 when A_breadth_awareness_sm_indicators is 0
  if
  (sum(DM_final$A_breadth_awareness_sm_indicators == 0, na.rm = T) != sum(DM_final$supermun_indic__888 == 1 & DM_final$supermun_indic_cap__888 == 1, na.rm = T))
    stop("There is something wrong with the number of zero values here, please check.")

## A_abs_deviation_relative_perf: sum of absolute deviations between decision makers' perception of and actual muni. performance quintile, across all groups of indicators in 2018

  #Note# Using 2018 as reference year

  DM_final %<>%

    ## Adapting variables to match SUPERMUN with DM survey data
    mutate(
      todrop_educ = case_when(
        educ_percep == "bottom" ~ 1,
        educ_percep == "top" ~ 5,
        educ_high_percep == "bottom" ~ 2, ##quintiles in middle 60%
        educ_high_percep == "middle" ~ 3, ##quintiles in middle 60%
        educ_high_percep == "top" ~ 4), ##quintiles in middle 60%
      todrop_watersan = case_when(
        watersan_percep == "bottom" ~ 1,
        watersan_percep == "top" ~ 5,
        watersan_high_percep == "bottom" ~ 2, ##quintiles in middle 60%
        watersan_high_percep == "middle" ~ 3, ##quintiles in middle 60%
        watersan_high_percep == "top" ~ 4), ##quintiles in middle 60%
      todrop_health = case_when(
        health_percep == "bottom" ~ 1,
        health_percep == "top" ~ 5,
        health_high_percep == "bottom" ~ 2, ##quintiles in middle 60%
        health_high_percep == "middle" ~ 3, ##quintiles in middle 60%
        health_high_percep == "top" ~ 4), ##quintiles in middle 60%
      todrop_adminserv = case_when(
        adminserv_percep == "bottom" ~ 1,
        adminserv_percep == "top" ~ 5,
        adminserv_high_percep == "bottom" ~ 2, ##quintiles in middle 60%
        adminserv_high_percep == "middle" ~ 3, ##quintiles in middle 60%
        adminserv_high_percep == "top" ~ 4), ##quintiles in middle 60%
      todrop_capacity = case_when(
        capacity_percep == "bottom" ~ 1,
        capacity_percep == "top" ~ 5,
        capacity_high_percep == "bottom" ~ 2, ##quintiles in middle 60%
        capacity_high_percep == "middle" ~ 3, ##quintiles in middle 60%
        capacity_high_percep == "top" ~ 4) ##quintiles in middle 60%
      ) %>%
    
    ## Absolute deviation between perceived and actual quintile in 2018
    rowwise() %>%
    mutate(
      A_abs_deviation_relative_perf_educ = abs(todrop_educ - stars_school2018_S), 
      A_abs_deviation_relative_perf_water = abs(todrop_watersan - stars_water_access2018_S), 
      A_abs_deviation_relative_perf_health = abs(todrop_health - stars_health2018_S), 
      A_abs_deviation_relative_perf_adminserv = abs(todrop_adminserv - stars_services2018_S), 
      A_abs_deviation_relative_perf_capacity = abs(todrop_capacity - stars_total_ic2018_S)
    ) %>%

    ## Average absolute deviaition across indicators
    mutate(A_abs_deviation_relative_perf = sum(
            A_abs_deviation_relative_perf_educ, 
            A_abs_deviation_relative_perf_water, 
            A_abs_deviation_relative_perf_health, 
            A_abs_deviation_relative_perf_adminserv, 
            A_abs_deviation_relative_perf_capacity, na.rm = F)) %>%
    
    ungroup() %>%
    select(!contains("todrop_")) %>%
    set_variable_labels(A_abs_deviation_relative_perf = "sum of absolute deviations between decision makers' perception of and 
                        actual muni. performance quintile, across all groups of indicators in 2018")
  
##########################
saveRDS(DM_final, file = here(datatype, "Final", "DM_final.RDS"))
saveRDS(DM_finalx2, file = here(datatype, "Final", "DM_finalx2.RDS"))

message("**04 completed")









